{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Basic Classification Example with TensorFlow\n",
    "Supervised problem"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "import tensorflow as tf\n",
    "import numpy as np\n",
    "import pandas as pd\n",
    "import matplotlib.pyplot as plt"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Import or generate data.\n",
    "All of our machine learning algorithms will depend on data. In this book we will either generate data or use an outside source of data. Sometimes it is better to rely on generated data because we will want to know the expected outcome.\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "     area  bathrooms\n",
      "0  2104.0        3.0\n",
      "1  1600.0        3.0\n",
      "2  2400.0        3.0\n",
      "3  1416.0        2.0\n",
      "4  3000.0        4.0\n",
      "5  1985.0        4.0\n",
      "6  1534.0        3.0\n",
      "7  1427.0        3.0\n",
      "8  1380.0        3.0\n",
      "9  1494.0        3.0\n"
     ]
    }
   ],
   "source": [
    "dataframe = pd.read_csv('input/data.csv')\n",
    "\n",
    "dataframe = dataframe.drop(['index', 'price', 'sq_price'],axis=1)\n",
    "dataframe = dataframe[0:10]\n",
    "print (dataframe)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>area</th>\n",
       "      <th>bathrooms</th>\n",
       "      <th>y1</th>\n",
       "      <th>y2</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>2104.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1600.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>2400.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>1416.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>3000.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>1985.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>1534.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>1427.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>1380.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>1494.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "     area  bathrooms  y1  y2\n",
       "0  2104.0        3.0   1   0\n",
       "1  1600.0        3.0   0   1\n",
       "2  2400.0        3.0   1   0\n",
       "3  1416.0        2.0   1   0\n",
       "4  3000.0        4.0   0   1\n",
       "5  1985.0        4.0   1   0\n",
       "6  1534.0        3.0   1   0\n",
       "7  1427.0        3.0   0   1\n",
       "8  1380.0        3.0   0   1\n",
       "9  1494.0        3.0   1   0"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Add labels\n",
    "# 1 is good buy and 0 is bad buy\n",
    "dataframe.loc[:, ('y1')]  = [1,0,1,1,0,1,1,0,0,1]\n",
    "\n",
    "#y2 is the negation of the y1\n",
    "dataframe.loc[:, ('y2')] = dataframe['y1'] == 0\n",
    "\n",
    "# convert true/false value to 1s and 0s\n",
    "dataframe.loc[:, ('y2')] = dataframe['y2'].astype(int)\n",
    "dataframe"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([[  2.10400000e+03,   3.00000000e+00],\n",
       "       [  1.60000000e+03,   3.00000000e+00],\n",
       "       [  2.40000000e+03,   3.00000000e+00],\n",
       "       [  1.41600000e+03,   2.00000000e+00],\n",
       "       [  3.00000000e+03,   4.00000000e+00],\n",
       "       [  1.98500000e+03,   4.00000000e+00],\n",
       "       [  1.53400000e+03,   3.00000000e+00],\n",
       "       [  1.42700000e+03,   3.00000000e+00],\n",
       "       [  1.38000000e+03,   3.00000000e+00],\n",
       "       [  1.49400000e+03,   3.00000000e+00]])"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "inputX = dataframe.loc[:, ['area','bathrooms']].as_matrix()\n",
    "inputY = dataframe.loc[:, ['y1','y2']].as_matrix()\n",
    "inputX"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Set algorithm parameters.\n",
    "Our algorithms usually have a set of parameters that we hold constant throughout the procedure. For example, this can be the number of iterations, the learning rate, or other fixed parameters of our choosing. It is considered good form to initialize these together so the reader or user can easily find them.\n",
    "learning_rate = 0.01  iterations = 1000\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "# Hyperparameters\n",
    "learning_rate = 0.00001\n",
    "training_epochs = 2000 #iterations\n",
    "display_steps = 50\n",
    "n_samples = inputY.size"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Initialize variables and placeholders.\n",
    "Tensorflow depends on us telling it what it can and cannot modify. Tensorflow will modify the variables during optimization to minimize a loss function. To accomplish this, we feed in data through placeholders. We need to initialize both of these, variables and placeholders with size and type, so that Tensorflow knows what to expect.\n",
    "a_var = tf.constant(42)  x_input = tf.placeholder(tf.float32, [None, input_size])  y_input = tf.placeholder(tf.fload32, [None, num_classes])\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "# Create our computation graph/ Neural Network\n",
    "# for feature input tensors, none means any numbers of examples\n",
    "# placgeholder are gateways for data into our computation\n",
    "x = tf.placeholder(tf.float32, [None,2])\n",
    "\n",
    "#create weights\n",
    "# 2 X 2 float matrix, that we'll keep updateing through the training process\n",
    "w = tf.Variable(tf.zeros([2,2]))\n",
    "\n",
    "#create bias , we have 2 bias since we have two features\n",
    "b = tf.Variable(tf.zeros([2]))\n",
    "\n",
    "y_values = tf.add(tf.matmul(x,w),b)\n",
    "\n",
    "y = tf.nn.softmax(y_values)\n",
    "\n",
    "# For trainign purpose, we'll also feed a matrix of labels\n",
    "y_ = tf.placeholder(tf.float32, [None,2]) "
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Define the model structure.\n",
    "After we have the data, and initialized our variables and placeholders, we have to define the model. This is done by building a computational graph. We tell Tensorflow what operations must be done on the variables and placeholders to arrive at our model predictions. We talk more in depth about computational graphs in chapter two, section one of this book.\n",
    "y_pred = tf.add(tf.mul(x_input, weight_matrix), b_matrix)\n",
    "### Declare the loss functions.\n",
    "After defining the model, we must be able to evaluate the output. This is where we declare the loss function. The loss function is very important as it tells us how far off our predictions are from the actual values. The different types of loss functions are explored in greater detail in chapter two, section five.\n",
    "loss = tf.reduce_mean(tf.square(y_actual – y_pred))\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "#Cost function: Mean squared error\n",
    "cost = tf.reduce_sum(tf.pow(y_ - y, 2))/(2*n_samples)\n",
    "# Gradient Descent \n",
    "optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(cost)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Initialize and train the model.\n",
    "Now that we have everything in place, we create an instance or our graph and feed in the data through the placeholders and let Tensorflow change the variables to better predict our training data. Here is one way to initialize the computational graph.\n",
    "with tf.Session(graph=graph) as session:\n",
    " ...\n",
    " session.run(...)\n",
    " ...\n",
    "Note that we can also initiate our graph with\n",
    "session = tf.Session(graph=graph)  session.run(…)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "#Initialize variables and tensorflow session\n",
    "init = tf.global_variables_initializer()\n",
    "sess = tf.Session()\n",
    "sess.run(init)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Evaluate the model.\n",
    "Once we have built and trained the model, we should evaluate the model by looking at how well it does on new data through some specified criteria.\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Training step: 0000 Cost: : 0.145388320\n",
      "Training step: 0050 Cost: : 0.131080121\n",
      "Training step: 0100 Cost: : 0.138472199\n",
      "Training step: 0150 Cost: : 0.121150449\n",
      "Training step: 0200 Cost: : 0.121230125\n",
      "Training step: 0250 Cost: : 0.121053793\n",
      "Training step: 0300 Cost: : 0.154228136\n",
      "Training step: 0350 Cost: : 0.120650433\n",
      "Training step: 0400 Cost: : 0.175630525\n",
      "Training step: 0450 Cost: : 0.178696260\n",
      "Training step: 0500 Cost: : 0.154529601\n",
      "Training step: 0550 Cost: : 0.151964471\n",
      "Training step: 0600 Cost: : 0.177694350\n",
      "Training step: 0650 Cost: : 0.177315921\n",
      "Training step: 0700 Cost: : 0.122371890\n",
      "Training step: 0750 Cost: : 0.131565526\n",
      "Training step: 0800 Cost: : 0.147059232\n",
      "Training step: 0850 Cost: : 0.134536177\n",
      "Training step: 0900 Cost: : 0.123008087\n",
      "Training step: 0950 Cost: : 0.179653764\n",
      "Training step: 1000 Cost: : 0.130259484\n",
      "Training step: 1050 Cost: : 0.157459542\n",
      "Training step: 1100 Cost: : 0.161322162\n",
      "Training step: 1150 Cost: : 0.148553342\n",
      "Training step: 1200 Cost: : 0.179500312\n",
      "Training step: 1250 Cost: : 0.124364354\n",
      "Training step: 1300 Cost: : 0.173981547\n",
      "Training step: 1350 Cost: : 0.157309309\n",
      "Training step: 1400 Cost: : 0.168647319\n",
      "Training step: 1450 Cost: : 0.135446191\n",
      "Training step: 1500 Cost: : 0.123318270\n",
      "Training step: 1550 Cost: : 0.141175568\n",
      "Training step: 1600 Cost: : 0.144161612\n",
      "Training step: 1650 Cost: : 0.174333096\n",
      "Training step: 1700 Cost: : 0.149039477\n",
      "Training step: 1750 Cost: : 0.160516173\n",
      "Training step: 1800 Cost: : 0.121821262\n",
      "Training step: 1850 Cost: : 0.156590730\n",
      "Training step: 1900 Cost: : 0.132786125\n",
      "Training step: 1950 Cost: : 0.159663841\n",
      "Optimization Done!\n",
      "Training cost= 0.148959 W= [[-0.00015635  0.00015635]\n",
      " [-0.00024458  0.00024458]] b= [ -2.61498317e-05   2.61498262e-05] \n",
      "\n"
     ]
    }
   ],
   "source": [
    "# Now for the actual training \n",
    "for i in range(training_epochs):\n",
    "    #Take a gradient descent step using our input and labels \n",
    "    sess.run(optimizer, feed_dict={x: inputX, y_:inputY})\n",
    "     \n",
    "    # Display logs per epoch step   \n",
    "    if (i)  % display_steps == 0:\n",
    "        cc = sess.run(cost, feed_dict={x: inputX, y_: inputY})\n",
    "        print(\"Training step:\", '%04d' % (i), \"Cost: :\", \"{:.9f}\".format(cc) )\n",
    "    \n",
    "print(\"Optimization Done!\")\n",
    "training_cost = sess.run(cost, feed_dict={x: inputX, y_: inputY})\n",
    "print (\"Training cost=\", training_cost, \"W=\", sess.run(w), \"b=\", sess.run(b), '\\n')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Predict new outcomes.\n",
    "It is also important to know how to make predictions on new, unseen, data. We can do this with all of our models, once we have them trained."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([[ 0.34086418,  0.65913582],\n",
       "       [ 0.37710753,  0.62289244],\n",
       "       [ 0.3203845 ,  0.6796155 ],\n",
       "       [ 0.39083183,  0.60916823],\n",
       "       [ 0.28087693,  0.71912307],\n",
       "       [ 0.3491624 ,  0.6508376 ],\n",
       "       [ 0.38196757,  0.6180324 ],\n",
       "       [ 0.38989681,  0.61010319],\n",
       "       [ 0.39339849,  0.60660154],\n",
       "       [ 0.38492468,  0.61507535]], dtype=float32)"
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "sess.run(y, feed_dict={x:inputX})"
   ]
  }
 ],
 "metadata": {
  "anaconda-cloud": {},
  "kernelspec": {
   "display_name": "Python [conda root]",
   "language": "python",
   "name": "conda-root-py"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.5.2"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 1
}
